#source('http://bioconductor.org/biocLite.R')
#biocLite('phyloseq')
library(phyloseq)
library(ggplot2)
library(plyr)
library(dplyr)
library(Rmisc)
library(DESeq2)
library(doParallel)
library(vegan)
library(grid)
library(gridExtra)
library(reshape2)
# Load biom file. 
biom <- import_biom("OTU_table.biom", "~/Dropbox/clado-manuscript/Nephele/PipelineResults_NMEPINZ20QK1/nephele_outputs/tree.tre", parseFunction=parse_taxonomy_greengenes)
biom
# Load and merge sample metadata with read data (biom). 
sam.data <- read.csv(file="sample.data.csv", row.names=1, header=TRUE)
sam.data$Date <- as.factor(sam.data$Date)
sam.data$DateSite <- paste(sam.data$Date, sam.data$Site)
sample_data(biom) <- sam.data
sample_data(biom)
## Sample Data:        [52 samples by 6 sample variables]:
##        TreatmentGroup  Site Date                       Description
## C178N1          Early North  178 Sample of day 178 at site North 1
## C178P1          Early Point  178 Sample of day 178 at site Point 1
## C185P2          Early Point  185 Sample of day 185 at site Point 2
## C206N2           Late North  206 Sample of day 206 at site North 2
## C206P1           Late Point  206 Sample of day 206 at site Point 1
## C206P2           Late Point  206 Sample of day 206 at site Point 2
## C214P1           Late Point  214 Sample of day 214 at site Point 1
## C214P2           Late Point  214 Sample of day 214 at site Point 2
## C214P3           Late Point  214 Sample of day 214 at site Point 3
## C214S1           Late South  214 Sample of day 214 at site South 1
## C214S2           Late South  214 Sample of day 214 at site South 2
## C214S3           Late South  214 Sample of day 214 at site South 3
## C185P1          Early Point  185 Sample of day 185 at site Point 1
## C185P3          Early Point  185 Sample of day 185 at site Point 3
## C199P3           Late Point  199 Sample of day 199 at site Point 3
## C199S2           Late South  199 Sample of day 199 at site South 2
## C199S3           Late South  199 Sample of day 199 at site South 3
## C206N1           Late North  206 Sample of day 206 at site North 1
## C178P2          Early Point  178 Sample of day 178 at site Point 2
## C199N3           Late North  199 Sample of day 199 at site North 3
## C206S1           Late South  206 Sample of day 206 at site South 1
## C214N3           Late North  214 Sample of day 214 at site North 3
## C199N2           Late North  199 Sample of day 199 at site North 2
## C206N3           Late North  206 Sample of day 206 at site North 3
## C206S2           Late South  206 Sample of day 206 at site South 2
## C199N1           Late North  199 Sample of day 199 at site North 1
## C199P1           Late Point  199 Sample of day 199 at site Point 1
## C199S1           Late South  199 Sample of day 199 at site South 1
## C214N1           Late North  214 Sample of day 214 at site North 1
## C172P1          Early Point  172 Sample of day 172 at site Point 1
## C199P2           Late Point  199 Sample of day 199 at site Point 2
## C172N3          Early North  172 Sample of day 172 at site North 3
## C172S3          Early South  172 Sample of day 172 at site South 3
## C178S2          Early South  178 Sample of day 178 at site South 2
## C178P3          Early Point  178 Sample of day 178 at site Point 3
## C178S3          Early South  178 Sample of day 178 at site South 3
## C172N1          Early North  172 Sample of day 172 at site North 1
## C172S1          Early South  172 Sample of day 172 at site South 1
## C178N3          Early North  178 Sample of day 178 at site North 3
## C185N2          Early North  185 Sample of day 185 at site North 2
## C185N3          Early North  185 Sample of day 185 at site North 3
## C185S3          Early South  185 Sample of day 185 at site South 3
## C214N2           Late North  214 Sample of day 214 at site North 2
## C172P2          Early Point  172 Sample of day 172 at site Point 2
## C185S2          Early South  185 Sample of day 185 at site South 2
## C172P3          Early Point  172 Sample of day 172 at site Point 3
## C185N1          Early North  185 Sample of day 185 at site North 1
## C172N2          Early North  172 Sample of day 172 at site North 2
## C178S1          Early South  178 Sample of day 178 at site South 1
## C185S1          Early South  185 Sample of day 185 at site South 1
## C172S2          Early South  172 Sample of day 172 at site South 2
## C178N2          Early North  178 Sample of day 178 at site North 2
##        SampleID.1  DateSite
## C178N1     C178N1 178 North
## C178P1     C178P1 178 Point
## C185P2     C185P2 185 Point
## C206N2     C206N2 206 North
## C206P1     C206P1 206 Point
## C206P2     C206P2 206 Point
## C214P1     C214P1 214 Point
## C214P2     C214P2 214 Point
## C214P3     C214P3 214 Point
## C214S1     C214S1 214 South
## C214S2     C214S2 214 South
## C214S3     C214S3 214 South
## C185P1     C185P1 185 Point
## C185P3     C185P3 185 Point
## C199P3     C199P3 199 Point
## C199S2     C199S2 199 South
## C199S3     C199S3 199 South
## C206N1     C206N1 206 North
## C178P2     C178P2 178 Point
## C199N3     C199N3 199 North
## C206S1     C206S1 206 South
## C214N3     C214N3 214 North
## C199N2     C199N2 199 North
## C206N3     C206N3 206 North
## C206S2     C206S2 206 South
## C199N1     C199N1 199 North
## C199P1     C199P1 199 Point
## C199S1     C199S1 199 South
## C214N1     C214N1 214 North
## C172P1     C172P1 172 Point
## C199P2     C199P2 199 Point
## C172N3     C172N3 172 North
## C172S3     C172S3 172 South
## C178S2     C178S2 178 South
## C178P3     C178P3 178 Point
## C178S3     C178S3 178 South
## C172N1     C172N1 172 North
## C172S1     C172S1 172 South
## C178N3     C178N3 178 North
## C185N2     C185N2 185 North
## C185N3     C185N3 185 North
## C185S3     C185S3 185 South
## C214N2     C214N2 214 North
## C172P2     C172P2 172 Point
## C185S2     C185S2 185 South
## C172P3     C172P3 172 Point
## C185N1     C185N1 185 North
## C172N2     C172N2 172 North
## C178S1     C178S1 178 South
## C185S1     C185S1 185 South
## C172S2     C172S2 172 South
## C178N2     C178N2 178 North
# Normalize by relative abundance. 
biom.relabund <- transform_sample_counts(biom, function(x) x / sum(x))
# Ordination plot, k = 3. 
ordNMDS.k3 <- ordinate(biom.relabund, method="NMDS", distance="bray", k=3)
## Run 0 stress 0.07869986 
## Run 1 stress 0.07869949 
## ... New best solution
## ... Procrustes: rmse 0.0002305481  max resid 0.001105219 
## ... Similar to previous best
## Run 2 stress 0.07869962 
## ... Procrustes: rmse 0.0002017883  max resid 0.001019135 
## ... Similar to previous best
## Run 3 stress 0.07869844 
## ... New best solution
## ... Procrustes: rmse 0.0004553477  max resid 0.001521332 
## ... Similar to previous best
## Run 4 stress 0.08334584 
## Run 5 stress 0.07874264 
## ... Procrustes: rmse 0.002511844  max resid 0.01380174 
## Run 6 stress 0.07869964 
## ... Procrustes: rmse 0.0005900723  max resid 0.002571066 
## ... Similar to previous best
## Run 7 stress 0.08334478 
## Run 8 stress 0.07872072 
## ... Procrustes: rmse 0.001339293  max resid 0.005271881 
## ... Similar to previous best
## Run 9 stress 0.07870018 
## ... Procrustes: rmse 0.0006557135  max resid 0.002997354 
## ... Similar to previous best
## Run 10 stress 0.07869808 
## ... New best solution
## ... Procrustes: rmse 0.0001766944  max resid 0.0005861921 
## ... Similar to previous best
## Run 11 stress 0.07869856 
## ... Procrustes: rmse 0.0002775179  max resid 0.001302053 
## ... Similar to previous best
## Run 12 stress 0.08337886 
## Run 13 stress 0.07869809 
## ... Procrustes: rmse 0.0001472413  max resid 0.0003891141 
## ... Similar to previous best
## Run 14 stress 0.07869902 
## ... Procrustes: rmse 0.0003348988  max resid 0.001535 
## ... Similar to previous best
## Run 15 stress 0.07869916 
## ... Procrustes: rmse 0.0003981774  max resid 0.001889146 
## ... Similar to previous best
## Run 16 stress 0.08334287 
## Run 17 stress 0.08804081 
## Run 18 stress 0.07869856 
## ... Procrustes: rmse 0.0002547032  max resid 0.001142475 
## ... Similar to previous best
## Run 19 stress 0.08695913 
## Run 20 stress 0.07906707 
## ... Procrustes: rmse 0.00900781  max resid 0.03814892 
## *** Solution reached
ord.k3 <- plot_ordination(biom.relabund, ordNMDS.k3, shape="Site", color = "Date") + geom_point(size=2)
ord.k3 + labs(title = "Cladophora, 2014") + theme_bw() + scale_colour_hue(h=c(300, 500))

# PERMANOVA. 
df = as(sample_data(biom), "data.frame")
d = phyloseq::distance(biom, "bray")
clado.adonis = adonis(d ~ Date*Site, df)
clado.adonis
## 
## Call:
## adonis(formula = d ~ Date * Site, data = df) 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##           Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## Date       5    3.1945 0.63890 17.4143 0.41338  0.001 ***
## Site       2    1.5561 0.77804 21.2068 0.20136  0.001 ***
## Date:Site 10    1.7298 0.17298  4.7148 0.22384  0.001 ***
## Residuals 34    1.2474 0.03669         0.16142           
## Total     51    7.7278                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
biom.rich.est <- estimate_richness(biom, measures = NULL)
biom.rich.est$SampleID.1 <- row.names(biom.rich.est)
biom.rich.est <- merge(biom.rich.est, sam.data, by = "SampleID.1")
biom.rich.est$Date <- as.character(biom.rich.est$Date)
biom.rich.est$Date <- as.numeric(biom.rich.est$Date)
head(biom.rich.est)
##   SampleID.1 Observed    Chao1  se.chao1      ACE   se.ACE  Shannon
## 1     C172N1     5139 6770.782  99.67941 7316.921 48.50633 5.444561
## 2     C172N2     5236 7017.650 107.98761 7420.840 47.98329 5.649933
## 3     C172N3     6890 8499.605  90.45120 9053.801 49.75824 5.980043
## 4     C172P1     2936 4855.835 138.81016 5427.422 47.24285 4.851552
## 5     C172P2     4833 7043.680 130.90171 7619.611 51.59008 5.306940
## 6     C172P3     3750 5099.247  96.43281 5368.421 41.19882 4.824680
##     Simpson InvSimpson    Fisher TreatmentGroup  Site Date
## 1 0.9874631   79.76426  896.0810          Early North  172
## 2 0.9892749   93.23962 1001.0057          Early North  172
## 3 0.9899773   99.77399 1361.3171          Early North  172
## 4 0.9767986   43.10086  520.1978          Early Point  172
## 5 0.9803746   50.95444  904.5831          Early Point  172
## 6 0.9400545   16.68183  655.5148          Early Point  172
##                         Description  DateSite
## 1 Sample of day 172 at site North 1 172 North
## 2 Sample of day 172 at site North 2 172 North
## 3 Sample of day 172 at site North 3 172 North
## 4 Sample of day 172 at site Point 1 172 Point
## 5 Sample of day 172 at site Point 2 172 Point
## 6 Sample of day 172 at site Point 3 172 Point
# Plot observed richness. 
biom.rich.est.obs <- summarySE(biom.rich.est, measurevar="Observed", groupvars=c("Date","Site"))
p.obs <- ggplot(biom.rich.est.obs, aes(x=Date, y=Observed, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=Observed-se, ymax=Observed+se)) + geom_line() + scale_colour_hue(h=c(400, 120))
p.obs + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Plot Shanon index richness. 
biom.rich.est.sha <- summarySE(biom.rich.est, measurevar="Shannon", groupvars=c("Date","Site"))
p.sha <- ggplot(biom.rich.est.sha, aes(x=Date, y=Shannon, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=Shannon-se, ymax=Shannon+se)) + geom_line() + scale_colour_hue(h=c(400, 120))
p.sha + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find top 30 genera and subset biom.relabund. 
sort.genera <- sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Genus"], sum), TRUE)
top.genera <- sort.genera[1:30]
top.genera.list <- names(top.genera)
biom.relabund.subset = subset_taxa(biom.relabund, Genus %in% top.genera.list)
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Genus %in% as.factor(top.genera.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 9093 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 9093 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 9093 tips and 9089 internal nodes ]
relabund.top.genera <- psmelt(biom.relabund.subset.taxa)
relabund.top.genera.genus <- relabund.top.genera%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Phylum, Family, Genus)
head(relabund.top.genera.genus)
## Source: local data frame [6 x 10]
## Groups: Sample, Genus [6]
## 
##   Sample GenusAbundance TreatmentGroup   Site   Date         Phylum
##    <chr>          <dbl>         <fctr> <fctr> <fctr>         <fctr>
## 1 C178S2     0.14728018          Early  South    178 Proteobacteria
## 2 C206N2     0.07541493           Late  North    206       [Thermi]
## 3 C199S1     0.12408425           Late  South    199 Proteobacteria
## 4 C185S1     0.12728636          Early  South    185 Proteobacteria
## 5 C199S3     0.10486769           Late  South    199 Proteobacteria
## 6 C172S1     0.10894857          Early  South    172  Bacteroidetes
## # ... with 4 more variables: Family <fctr>, Genus <fctr>, Sample <chr>,
## #   Genus <fctr>
# Summary of genus abundance of top 30 genera. 
relabund.top.genera.genus.est <- summarySE(relabund.top.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.top.genera.genus.est)
##    Site Date         Genus N GenusAbundance           sd           se
## 1 North  172   Armatimonas 3   0.0082179013 0.0037601213 0.0021709070
## 2 North  172  Bdellovibrio 3   0.0023540936 0.0001963925 0.0001133873
## 3 North  172    Cellvibrio 3   0.0058399987 0.0066066072 0.0038143264
## 4 North  172          CM44 3   0.0088329518 0.0005298073 0.0003058844
## 5 North  172    Crenothrix 3   0.0008092387 0.0004823472 0.0002784833
## 6 North  172 Dechloromonas 3   0.0019270556 0.0026318679 0.0015195096
##             ci
## 1 0.0093406591
## 2 0.0004878661
## 3 0.0164117220
## 4 0.0013161143
## 5 0.0011982168
## 6 0.0065379223
relabund.top.genera.genus.est$Date <- as.character(relabund.top.genera.genus.est$Date)
relabund.top.genera.genus.est$Date <- as.numeric(relabund.top.genera.genus.est$Date)
# Plot summary of genus abundance of top 30 genera. 
p <- ggplot(relabund.top.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find methanotrophic bacteria by genus. 
methanolist <- read.table(file = "taxa-of-interest/methanos.txt")
methanolist <- as.vector(methanolist$V1)
biom.relabund.methanos <- subset_taxa(biom.relabund, Genus %in% as.factor(methanolist))
biom.relabund.methanos
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 567 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 567 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 567 tips and 566 internal nodes ]
relabund.methanos <- psmelt(biom.relabund.methanos)
relabund.methanos.genus <- relabund.methanos%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Phylum, Family, Genus)
relabund.methanos.genus.est <- summarySE(relabund.methanos.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(relabund.methanos.genus.est)
##    Site Date            Genus N GenusAbundance           sd           se
## 1 North  172       Crenothrix 3   8.092387e-04 4.823472e-04 2.784833e-04
## 2 North  172      Methylibium 3   6.600147e-04 3.800437e-04 2.194183e-04
## 3 North  172    Methylocaldum 3   7.717447e-05 5.988775e-05 3.457621e-05
## 4 North  172 Methylomicrobium 3   0.000000e+00 0.000000e+00 0.000000e+00
## 5 North  172     Methylomonas 3   1.097031e-04 2.736128e-06 1.579704e-06
## 6 North  172     Methylosinus 3   4.677898e-05 2.432451e-05 1.404376e-05
##             ci
## 1 1.198217e-03
## 2 9.440809e-04
## 3 1.487694e-04
## 4 0.000000e+00
## 5 6.796919e-06
## 6 6.042543e-05
relabund.methanos.genus.est$Date <- as.character(relabund.methanos.genus.est$Date)
relabund.methanos.genus.est$Date <- as.numeric(relabund.methanos.genus.est$Date)
# Plot summary of genus abundance of methanotrophic genera. 
p <- ggplot(relabund.methanos.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find all genera
all.genera <- sort(get_taxa_unique(biom.relabund, "Genus"), decreasing=FALSE)
biom.relabund.all.genera <- subset_taxa(biom.relabund, Genus %in% as.factor(all.genera))
biom.relabund.all.genera <- psmelt(biom.relabund.all.genera)
biom.relabund.all.genera.genus <- biom.relabund.all.genera%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
biom.relabund.all.genera.genus.est <- summarySE(biom.relabund.all.genera.genus, measurevar="GenusAbundance", groupvars=c("Site","Date", "Genus"))
head(biom.relabund.all.genera.genus.est)
##    Site Date           Genus N GenusAbundance           sd           se
## 1 North  172             A17 3   1.044934e-05 7.240333e-06 4.180208e-06
## 2 North  172   Achromobacter 3   1.205785e-06 2.088482e-06 1.205785e-06
## 3 North  172 Acidaminobacter 3   2.030206e-05 3.516421e-05 2.030206e-05
## 4 North  172      Acidocella 3   0.000000e+00 0.000000e+00 0.000000e+00
## 5 North  172      Acidovorax 3   4.023211e-05 3.508560e-05 2.025668e-05
## 6 North  172   Acinetobacter 3   1.395043e-04 1.803168e-04 1.041059e-04
##             ci
## 1 1.798598e-05
## 2 5.188076e-06
## 3 8.735273e-05
## 4 0.000000e+00
## 5 8.715747e-05
## 6 4.479317e-04
biom.relabund.all.genera.genus.est$Date <- as.character(biom.relabund.all.genera.genus.est$Date)
biom.relabund.all.genera.genus.est$Date <- as.numeric(biom.relabund.all.genera.genus.est$Date)
p <- ggplot(biom.relabund.all.genera.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# We want to calculate the total relative abundance of each phylum. 
# "Melt" the phyloseq data into a dataframe and then take the top X most abundant phyla. 
biom.melt <- psmelt(biom.relabund)
biom.melt.sorted <- biom.melt %>%
  group_by(Sample,Phylum) %>%
  summarize(PhyAbund = sum(Abundance))%>%
  group_by(Phylum)%>%
  summarize(MeanPhyAbund = mean(PhyAbund))%>%
  arrange(-MeanPhyAbund)
# List of nPhyla top most abundant phyla. 
nPhyla =16
PhylumList <- biom.melt.sorted[1:nPhyla,1]
PhylumList <- PhylumList[is.na(PhylumList)==FALSE,]
PhylumList <- levels(droplevels(as.factor(PhylumList$Phylum)))
PhylumList
##  [1] "[Thermi]"         "Acidobacteria"    "Actinobacteria"  
##  [4] "Armatimonadetes"  "Bacteroidetes"    "Chlorobi"        
##  [7] "Chloroflexi"      "Cyanobacteria"    "Gemmatimonadetes"
## [10] "GN02"             "Planctomycetes"   "Proteobacteria"  
## [13] "SR1"              "TM7"              "Verrucomicrobia"
# Subset biom.melt for phyla. 
biom.subset <- subset_taxa(biom.relabund, Phylum %in% PhylumList)
biom.subset.melt <- psmelt(biom.subset)
biom.subset.melt.sorted = biom.subset.melt %>%
  group_by(Sample,Site,Date,Phylum) %>%
  summarize(PhyAbund = sum(Abundance))
# Summarize phylum abundances. 
biom.subset.melt.sorted.est <- summarySE(biom.subset.melt.sorted, measurevar="PhyAbund", groupvars=c("Site","Date", "Phylum"))
head(biom.subset.melt.sorted.est)
##    Site Date          Phylum N    PhyAbund           sd           se
## 1 North  172        [Thermi] 3 0.035290992 0.0143074443 0.0082604068
## 2 North  172   Acidobacteria 3 0.053632305 0.0245078281 0.0141496011
## 3 North  172  Actinobacteria 3 0.042155944 0.0102741604 0.0059317892
## 4 North  172 Armatimonadetes 3 0.008399230 0.0037857001 0.0021856750
## 5 North  172   Bacteroidetes 3 0.396831763 0.0276493911 0.0159633834
## 6 North  172        Chlorobi 3 0.000709788 0.0005910336 0.0003412334
##            ci
## 1 0.035541662
## 2 0.060880820
## 3 0.025522429
## 4 0.009404200
## 5 0.068684895
## 6 0.001468209
biom.subset.melt.sorted.est$Date <- as.character(biom.subset.melt.sorted.est$Date)
biom.subset.melt.sorted.est$Date <- as.numeric(biom.subset.melt.sorted.est$Date)
# Plot top phyla abundances. 
p <- ggplot(biom.subset.melt.sorted.est, aes(x=Date, y=PhyAbund, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=PhyAbund-se, ymax=PhyAbund+se)) +  geom_line() + facet_wrap(~Phylum, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400,120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10)) + labs(x="Date",y="Relative Abundance") 

# Find top 30 classes and subset biom.relabund. 
sort.classes <- sort(tapply(taxa_sums(biom.relabund), tax_table(biom.relabund)[, "Class"], sum), TRUE)
top.classes <- sort.classes[1:30]
top.classes.list <- names(top.classes)
biom.relabund.subset = subset_taxa(biom.relabund, Class %in% top.classes.list)
biom.relabund.subset.taxa <- subset_taxa(biom.relabund.subset, Class %in% as.factor(top.classes.list))
biom.relabund.subset.taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 38156 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 38156 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 38156 tips and 38142 internal nodes ]
relabund.top.classes <- psmelt(biom.relabund.subset.taxa)
relabund.top.classes.class <- relabund.top.classes%>%
  group_by(Sample, Class)%>%
  mutate(ClassAbundance = sum(Abundance))%>%
  distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Phylum, Family, Class)
relabund.top.classes.class.print <- summarySE(relabund.top.classes.class, measurevar="ClassAbundance", groupvars=c("Class"))
relabund.top.classes.class.print[,c("Class","N","ClassAbundance")]
##                    Class    N ClassAbundance
## 1   [Chloracidobacteria]  156   0.0242780076
## 2         [Pedosphaerae]  312   0.0008576936
## 3          [Saprospirae]  156   0.2600740284
## 4         Acidimicrobiia  572   0.0142314956
## 5        Acidobacteria-6  156   0.0087173757
## 6         Actinobacteria 1092   0.0008919558
## 7    Alphaproteobacteria 1196   0.1189849423
## 8          Armatimonadia   52   0.0043355902
## 9            Bacteroidia  468   0.0016047794
## 10                 BD1-5   52   0.0036947445
## 11    Betaproteobacteria  728   0.1394166181
## 12           Chloroplast  156   0.0387461867
## 13            Clostridia  624   0.0008200079
## 14            Cytophagia  260   0.0479689304
## 15            Deinococci  156   0.0590344529
## 16   Deltaproteobacteria 1352   0.0199104462
## 17        Flavobacteriia  208   0.0494758417
## 18   Gammaproteobacteria 1664   0.0668946728
## 19      Gemmatimonadetes  156   0.0019508196
## 20                 OM190   52   0.0040714493
## 21                 OPB56   52   0.0007609416
## 22         Phycisphaerae  104   0.0016755586
## 23        Planctomycetia  260   0.0069005428
## 24          Solibacteres  208   0.0118832190
## 25      Sphingobacteriia  156   0.0073552263
## 26 Synechococcophycideae  260   0.0354155284
## 27                  TA18   52   0.0015786666
## 28                 TM7-1   52   0.0018488588
## 29      Verrucomicrobiae   52   0.0026770285
## 30                   ZB2   52   0.0007206090
# Summary of class abundance of top 30 classes. 
relabund.top.classes.class.est <- summarySE(relabund.top.classes.class, measurevar="ClassAbundance", groupvars=c("Site","Date", "Class"))
head(relabund.top.classes.class.est)
##    Site Date                Class  N ClassAbundance           sd
## 1 North  172 [Chloracidobacteria]  9    0.039306658 0.0176302507
## 2 North  172       [Pedosphaerae] 18    0.001325860 0.0001686127
## 3 North  172        [Saprospirae]  9    0.191751831 0.0190289005
## 4 North  172       Acidimicrobiia 33    0.038384578 0.0085857752
## 5 North  172      Acidobacteria-6  9    0.003818652 0.0006679931
## 6 North  172       Actinobacteria 63    0.003232369 0.0001236175
##             se           ci
## 1 5.876750e-03 1.355181e-02
## 2 3.974240e-05 8.384913e-05
## 3 6.342967e-03 1.462691e-02
## 4 1.494592e-03 3.044383e-03
## 5 2.226644e-04 5.134650e-04
## 6 1.557434e-05 3.113266e-05
relabund.top.classes.class.est$Date <- as.character(relabund.top.classes.class.est$Date)
relabund.top.classes.class.est$Date <- as.numeric(relabund.top.classes.class.est$Date)
# Plot summary of class abundance of top 30 classes. 
p <- ggplot(relabund.top.classes.class.est, aes(x=Date, y=ClassAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) +  geom_line() + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Find all classes
all.classes <- sort(get_taxa_unique(biom.relabund, "Class"), decreasing=FALSE)
biom.relabund.all.classes <- subset_taxa(biom.relabund, Class %in% as.factor(all.classes))
biom.relabund.all.classes <- psmelt(biom.relabund.all.classes)
biom.relabund.all.classes.class <- biom.relabund.all.classes%>%
  group_by(Sample, Class)%>%
  mutate(ClassAbundance = sum(Abundance))%>%
  distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Family, Class)
biom.relabund.all.classes.class.est <- summarySE(biom.relabund.all.classes.class, measurevar="ClassAbundance", groupvars=c("Site","Date", "Class"))
head(biom.relabund.all.classes.class.est)
##    Site Date                Class N ClassAbundance           sd
## 1 North  172       [Brachyspirae] 6   0.000000e+00 0.000000e+00
## 2 North  172       [Brevinematae] 3   0.000000e+00 0.000000e+00
## 3 North  172 [Chloracidobacteria] 9   3.930666e-02 1.763025e-02
## 4 North  172        [Cloacamonae] 6   0.000000e+00 0.000000e+00
## 5 North  172     [Fimbriimonadia] 3   2.686822e-05 1.191337e-05
## 6 North  172      [Lentisphaeria] 6   1.561697e-06 2.419371e-06
##             se           ci
## 1 0.000000e+00 0.000000e+00
## 2 0.000000e+00 0.000000e+00
## 3 5.876750e-03 1.355181e-02
## 4 0.000000e+00 0.000000e+00
## 5 6.878186e-06 2.959445e-05
## 6 9.877040e-07 2.538974e-06
biom.relabund.all.classes.class.est$Date <- as.character(biom.relabund.all.classes.class.est$Date)
biom.relabund.all.classes.class.est$Date <- as.numeric(biom.relabund.all.classes.class.est$Date)
p <- ggplot(biom.relabund.all.classes.class.est, aes(x=Date, y=ClassAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) +  geom_line() + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Data downloaded from 
data <- read.csv("north_temperate_lakes_lter__daily_water_temperature_-_lake_mendota.csv", header=T)
head(data)
##   sampledate year4 month daynum depth  wtemp flag_wtemp
## 1 2006-06-27  2006     6    178   0.0 22.205          N
## 2 2006-06-27  2006     6    178   0.5 22.202          N
## 3 2006-06-27  2006     6    178   1.0 22.244          N
## 4 2006-06-27  2006     6    178   1.5 22.219          N
## 5 2006-06-27  2006     6    178   2.0 22.259          N
## 6 2006-06-27  2006     6    178   2.5 22.214          N
nolegend <- theme(legend.position="none")
Jdays <- c(172,178,185,199,206,214)
# Subset all wtemp at or below 1.5 meters (sample depth up to shore). 
clado.depth <- subset(data, depth<=1.5, select = year4:wtemp)
# All years (2006-2016), full year. 
p <- qplot(clado.depth$daynum, y = clado.depth$wtemp)
p <- p + geom_point(aes(colour = clado.depth$year4), size=1, alpha = 0.8) + 
  scale_y_continuous(limits = c(4,30))+
  xlab("Date") + ylab("Temperature °C")+ 
  geom_vline(xintercept=Jdays, colour="darkgreen", linetype = "longdash") +
  guides(color=guide_legend(title="Year")) + 
  scale_colour_gradient(low = "darkred")
p+ theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# All years (2006-2016), sample dates ± 25 days
clado.depth.zoom <- subset(clado.depth, daynum>=150 & daynum<=250, select = year4:wtemp)
p <- qplot(clado.depth.zoom$daynum, y = clado.depth.zoom$wtemp) + 
  scale_y_continuous(limits = c(10,30))+
  geom_point(aes(colour = clado.depth.zoom$year4), size=1, alpha = 0.8) + 
  labs(title = "Water Temperature <1.5m Depth, Lake Mendota", x="Date", y="Temperature °C") + 
  geom_vline(xintercept=Jdays, colour="darkgreen", linetype = "longdash") + 
  guides(color=guide_legend(title="Year")) + scale_colour_gradient(low = "darkred")
p+ theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Smarter way to make grid of plots. 
p <- ggplot(clado.depth.zoom, aes(daynum, wtemp))
p <- p + geom_point(size = 0.5) + 
  ylim(15,30) + 
  facet_wrap(~year4, ncol = 3) + 
  geom_vline(xintercept=Jdays, colour="grey30", linetype = "longdash") +
  guides(color=guide_legend(title="Year")) +
  labs(title = "Water Temperature <1.5m Depth, Lake Mendota", x="Date", y="Temperature °C")
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

clado.depth.not14 <- subset(clado.depth, year4!=2014)
clado.depth.est.not14 <- summarySE(clado.depth.not14, measurevar= "wtemp", groupvars=c("daynum"))
head(clado.depth.est.not14)
##   daynum N   wtemp          sd          se          ci
## 1     89 4 5.18050 0.039076847 0.019538424 0.062179984
## 2     90 4 4.85075 0.015283433 0.007641717 0.024319353
## 3     91 4 4.86175 0.012711543 0.006355772 0.020226902
## 4     92 4 5.02750 0.007852813 0.003926406 0.012495577
## 5     93 4 5.11150 0.024365276 0.012182638 0.038770591
## 6     94 4 5.12750 0.005066228 0.002533114 0.008061499
p <- ggplot(clado.depth.est.not14, aes(x=daynum, y=wtemp)) + 
  geom_point() +  
  geom_errorbar(aes(ymin=wtemp-se, ymax=wtemp+se)) + geom_line()+
  geom_vline(xintercept=Jdays, colour="darkgreen", linetype = "longdash") + 
  labs(x="Date", y="Temperature °C at depths <1.5m")
p + theme_bw() + 
  scale_y_continuous(limits = c(10,26))+
  scale_x_continuous(limits = c(120,320))

  theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))
## List of 2
##  $ axis.text.x:List of 10
##   ..$ family    : NULL
##   ..$ face      : NULL
##   ..$ colour    : NULL
##   ..$ size      : num 10
##   ..$ hjust     : num 1
##   ..$ vjust     : NULL
##   ..$ angle     : num 45
##   ..$ lineheight: NULL
##   ..$ margin    : NULL
##   ..$ debug     : NULL
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y:List of 10
##   ..$ family    : NULL
##   ..$ face      : NULL
##   ..$ colour    : NULL
##   ..$ size      : num 10
##   ..$ hjust     : NULL
##   ..$ vjust     : NULL
##   ..$ angle     : NULL
##   ..$ lineheight: NULL
##   ..$ margin    : NULL
##   ..$ debug     : NULL
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE
# Find genera of interest and subset biom.relabund. 
genofint <- read.table(file ="taxa-of-interest/genera-of-interest.txt")
genofint.list <- as.vector(genofint$V1)
biom.relabund.subset.genofint = subset_taxa(biom.relabund, Genus %in% genofint.list)
biom.relabund.subset.genofint.taxa <- subset_taxa(biom.relabund.subset.genofint, Genus %in% as.factor(genofint.list))
biom.relabund.subset.genofint.taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 6648 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 6648 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 6648 tips and 6646 internal nodes ]
relabund.genofint <- psmelt(biom.relabund.subset.genofint.taxa)
relabund.genofint.genus <- relabund.genofint%>%
  group_by(Sample, Genus)%>%
  mutate(GenusAbundance = sum(Abundance))%>%
  distinct(Sample, GenusAbundance, TreatmentGroup, Site, Date, Family, Genus)
head(relabund.genofint.genus)
## Source: local data frame [6 x 9]
## Groups: Sample, Genus [6]
## 
##   Sample GenusAbundance TreatmentGroup   Site   Date          Family
##    <chr>          <dbl>         <fctr> <fctr> <fctr>          <fctr>
## 1 C178S2     0.14728018          Early  South    178 Crenotrichaceae
## 2 C206N2     0.07541493           Late  North    206      Thermaceae
## 3 C199S1     0.12408425           Late  South    199 Crenotrichaceae
## 4 C185S1     0.12728636          Early  South    185 Crenotrichaceae
## 5 C199S3     0.10486769           Late  South    199 Crenotrichaceae
## 6 C172S1     0.10894857          Early  South    172   Cytophagaceae
## # ... with 3 more variables: Genus <fctr>, Sample <chr>, Genus <fctr>
# Summary of genus abundance of genera of interest. 
relabund.genofint.genus.est <- summarySE(relabund.genofint.genus, measurevar="GenusAbundance", groupvars=c("Site","Date","Genus"))
head(relabund.genofint.genus.est)
##    Site Date          Genus N GenusAbundance           sd           se
## 1 North  172    Armatimonas 3   0.0082179013 0.0037601213 0.0021709070
## 2 North  172   Bdellovibrio 3   0.0023540936 0.0001963925 0.0001133873
## 3 North  172     Crenothrix 3   0.0008092387 0.0004823472 0.0002784833
## 4 North  172 Flavobacterium 3   0.0853337233 0.0369814367 0.0213512424
## 5 North  172 Flectobacillus 3   0.0109642505 0.0028549117 0.0016482840
## 6 North  172     Fluviicola 3   0.0032900333 0.0012430598 0.0007176809
##             ci
## 1 0.0093406591
## 2 0.0004878661
## 3 0.0011982168
## 4 0.0918669814
## 5 0.0070919938
## 6 0.0030879318
relabund.genofint.genus.est$Date <- as.character(relabund.genofint.genus.est$Date)
relabund.genofint.genus.est$Date <- as.numeric(relabund.genofint.genus.est$Date)
# Plot summary of genus abundance of genera of interest. 
p <- ggplot(relabund.genofint.genus.est, aes(x=Date, y=GenusAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=GenusAbundance-se, ymax=GenusAbundance+se)) +  geom_line() + facet_wrap(~Genus, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10)) + ylab("Relative Abundance")

# Find classes_zulk of interest and subset biom.relabund. 
classzulk <- read.csv(file = "taxa-of-interest/classes-zulkifly.csv", header = T)
classzulk.list <- as.vector(classzulk$Class)
biom.relabund.subset.classzulk = subset_taxa(biom.relabund, Class %in% classzulk.list)
biom.relabund.subset.classzulk.taxa <- subset_taxa(biom.relabund.subset.classzulk, Class %in% as.factor(classzulk.list))
biom.relabund.subset.classzulk.taxa
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 37840 taxa and 52 samples ]
## sample_data() Sample Data:       [ 52 samples by 6 sample variables ]
## tax_table()   Taxonomy Table:    [ 37840 taxa by 8 taxonomic ranks ]
## phy_tree()    Phylogenetic Tree: [ 37840 tips and 37826 internal nodes ]
relabund.classzulk <- psmelt(biom.relabund.subset.classzulk.taxa)
relabund.classzulk.class <- relabund.classzulk%>%
  group_by(Sample, Class)%>%
  mutate(ClassAbundance = sum(Abundance))%>%
  distinct(Sample, ClassAbundance, TreatmentGroup, Site, Date, Phylum, Family, Class)
length(relabund.classzulk.class$Class); head(relabund.classzulk.class)
## [1] 10816
## Source: local data frame [6 x 10]
## Groups: Sample, Class [6]
## 
##   Sample ClassAbundance TreatmentGroup   Site   Date         Phylum
##    <chr>          <dbl>         <fctr> <fctr> <fctr>         <fctr>
## 1 C206N3      0.2638815           Late  North    206 Proteobacteria
## 2 C178S2      0.1751127          Early  South    178 Proteobacteria
## 3 C206N2      0.1210454           Late  North    206       [Thermi]
## 4 C199S1      0.1578279           Late  South    199 Proteobacteria
## 5 C185S1      0.1641425          Early  South    185 Proteobacteria
## 6 C178N2      0.2485806          Early  North    178  Bacteroidetes
## # ... with 4 more variables: Family <fctr>, Class <fctr>, Sample <chr>,
## #   Class <fctr>
relabund.classzulk.class <- subset(relabund.classzulk.class, Class!="Chloroplast")
length(relabund.classzulk.class$Class); head(relabund.classzulk.class)
## [1] 10816
## Source: local data frame [6 x 10]
## Groups: Sample, Class [6]
## 
##   Sample ClassAbundance TreatmentGroup   Site   Date         Phylum
##    <chr>          <dbl>         <fctr> <fctr> <fctr>         <fctr>
## 1 C206N3      0.2638815           Late  North    206 Proteobacteria
## 2 C178S2      0.1751127          Early  South    178 Proteobacteria
## 3 C206N2      0.1210454           Late  North    206       [Thermi]
## 4 C199S1      0.1578279           Late  South    199 Proteobacteria
## 5 C185S1      0.1641425          Early  South    185 Proteobacteria
## 6 C178N2      0.2485806          Early  North    178  Bacteroidetes
## # ... with 4 more variables: Family <fctr>, Class <fctr>, Sample <chr>,
## #   Class <fctr>
# Summary of class abundance of classes_zulk of interest. 
relabund.classzulk.class.est <- summarySE(relabund.classzulk.class, measurevar ="ClassAbundance", groupvars=c("Site","Date","Phylum","Class"))
length(relabund.classzulk.class.est$Class)
## [1] 540
relabund.classzulk.class.est$Date <- as.character(relabund.classzulk.class.est$Date)
relabund.classzulk.class.est$Date <- as.numeric(relabund.classzulk.class.est$Date)
# Plot MERGED summary of class abundance of classes_zulk of interest and Zulkifly et al. class abundances. 
relabund.classzulk.class.est.merge <- merge(relabund.classzulk.class.est, classzulk, by = "Class")
head(relabund.classzulk.class.est.merge)
##                  Class  Site Date        Phylum N ClassAbundance
## 1 [Chloracidobacteria] North  206 Acidobacteria 9     0.02491042
## 2 [Chloracidobacteria] North  214 Acidobacteria 9     0.02066233
## 3 [Chloracidobacteria] South  214 Acidobacteria 9     0.01934126
## 4 [Chloracidobacteria] North  185 Acidobacteria 9     0.03053088
## 5 [Chloracidobacteria] North  178 Acidobacteria 9     0.03048632
## 6 [Chloracidobacteria] South  199 Acidobacteria 9     0.01520458
##             sd           se           ci ClassAbundance_zulkifly
## 1 0.0030860368 0.0010286789 0.0023721379                       0
## 2 0.0017004535 0.0005668178 0.0013070843                       0
## 3 0.0007875231 0.0002625077 0.0006053439                       0
## 4 0.0018881869 0.0006293956 0.0014513889                       0
## 5 0.0061337576 0.0020445859 0.0047148235                       0
## 6 0.0015427929 0.0005142643 0.0011858956                       0
p <- ggplot(relabund.classzulk.class.est.merge, aes(x=Date, y=ClassAbundance, color = Site)) + geom_point() +  geom_errorbar(aes(ymin=ClassAbundance-se, ymax=ClassAbundance+se)) +  geom_line() + facet_wrap(~Class, ncol = 3, scales="free_y") + scale_colour_hue(h=c(400, 120)) + geom_hline(aes(yintercept = ClassAbundance_zulkifly), linetype="dashed")+ scale_colour_hue(h=c(400, 120))
p + theme_bw() + theme(axis.text.x = element_text(size = 10, angle = 45, hjust=1),axis.text.y = element_text(size = 10))

# Summary of class abundance of classes_zulk of interest. 
relabund.classzulk.class.est.nosplit <- summarySE(relabund.classzulk.class.est, measurevar ="ClassAbundance", groupvars=c("Phylum","Class"))
head(relabund.classzulk.class.est.nosplit)
##           Phylum                Class  N ClassAbundance          sd
## 1       [Thermi]           Deinococci 18    0.058402291 0.022587941
## 2  Acidobacteria [Chloracidobacteria] 18    0.023908228 0.008663260
## 3  Acidobacteria      Acidobacteria-6 18    0.008565357 0.006008876
## 4  Acidobacteria         Solibacteres 18    0.011663895 0.008689581
## 5 Actinobacteria       Acidimicrobiia 18    0.014017705 0.010998519
## 6 Actinobacteria       Actinobacteria 18    0.000871626 0.001038802
##             se          ci
## 1 0.0053240287 0.011232719
## 2 0.0020419499 0.004308138
## 3 0.0014163056 0.002988144
## 4 0.0020481538 0.004321227
## 5 0.0025923759 0.005469435
## 6 0.0002448479 0.000516584
relabund.classzulk.class.est.merge.nosplit <- merge(relabund.classzulk.class.est.nosplit, classzulk, by ="Class")
relabund.classzulk.class.est.merge.nosplit
##                    Class           Phylum  N ClassAbundance           sd
## 1   [Chloracidobacteria]    Acidobacteria 18   0.0239082284 0.0086632598
## 2         [Pedosphaerae]  Verrucomicrobia 18   0.0008585850 0.0006050463
## 3          [Saprospirae]    Bacteroidetes 18   0.2606628866 0.0436653804
## 4         Acidimicrobiia   Actinobacteria 18   0.0140177053 0.0109985194
## 5        Acidobacteria-6    Acidobacteria 18   0.0085653572 0.0060088756
## 6         Actinobacteria   Actinobacteria 18   0.0008716260 0.0010388017
## 7    Alphaproteobacteria   Proteobacteria 18   0.1193086584 0.0258636402
## 8          Armatimonadia  Armatimonadetes 18   0.0043316962 0.0031456709
## 9            Bacteroidia    Bacteroidetes 18   0.0015860106 0.0013530463
## 10                 BD1-5             GN02 18   0.0036777802 0.0020102991
## 11    Betaproteobacteria   Proteobacteria 18   0.1382959544 0.0300836285
## 12            Clostridia       Firmicutes 18   0.0007966420 0.0012419290
## 13            Cytophagia    Bacteroidetes 18   0.0471742356 0.0352465522
## 14            Deinococci         [Thermi] 18   0.0584022914 0.0225879408
## 15   Deltaproteobacteria   Proteobacteria 18   0.0202432199 0.0092228614
## 16        Flavobacteriia    Bacteroidetes 18   0.0496626064 0.0197355920
## 17   Gammaproteobacteria   Proteobacteria 18   0.0667539305 0.0486753799
## 18      Gemmatimonadetes Gemmatimonadetes 18   0.0019379966 0.0010921809
## 19            Nitrospira      Nitrospirae 18   0.0003613375 0.0003812892
## 20                 OM190   Planctomycetes 18   0.0040188935 0.0023178795
## 21                 OPB56         Chlorobi 18   0.0008028522 0.0009068642
## 22         Phycisphaerae   Planctomycetes 18   0.0017869873 0.0018118456
## 23        Planctomycetia   Planctomycetes 18   0.0068856180 0.0034767750
## 24          Solibacteres    Acidobacteria 18   0.0116638949 0.0086895806
## 25      Sphingobacteriia    Bacteroidetes 18   0.0073246136 0.0036968485
## 26 Synechococcophycideae    Cyanobacteria 18   0.0356335002 0.0400623018
## 27                  TA18   Proteobacteria 18   0.0015533374 0.0010169741
## 28                 TM7-1              TM7 18   0.0018127999 0.0012590898
## 29      Verrucomicrobiae  Verrucomicrobia 18   0.0027409754 0.0017111353
## 30                   ZB2              OD1 18   0.0007276926 0.0005640772
##              se           ci ClassAbundance_zulkifly
## 1  2.041950e-03 0.0043081377                    0.00
## 2  1.426108e-04 0.0003008824                    0.00
## 3  1.029203e-02 0.0217142828                    0.00
## 4  2.592376e-03 0.0054694350                    0.00
## 5  1.416306e-03 0.0029881435                    0.00
## 6  2.448479e-04 0.0005165840                    0.00
## 7  6.096118e-03 0.0128616857                    0.17
## 8  7.414417e-04 0.0015643053                    0.00
## 9  3.189161e-04 0.0006728541                    0.01
## 10 4.738320e-04 0.0009996982                    0.00
## 11 7.090779e-03 0.0149602365                    0.21
## 12 2.927255e-04 0.0006175968                    0.00
## 13 8.307692e-03 0.0175276980                    0.00
## 14 5.324029e-03 0.0112327187                    0.03
## 15 2.173849e-03 0.0045864211                    0.07
## 16 4.651724e-03 0.0098142790                    0.07
## 17 1.147290e-02 0.0242056969                    0.12
## 18 2.574295e-04 0.0005431288                    0.01
## 19 8.987072e-05 0.0001896106                    0.01
## 20 5.463294e-04 0.0011526543                    0.00
## 21 2.137499e-04 0.0004509730                    0.00
## 22 4.270561e-04 0.0009010096                    0.00
## 23 8.194837e-04 0.0017289595                    0.05
## 24 2.048154e-03 0.0043212268                    0.00
## 25 8.713555e-04 0.0018383995                    0.16
## 26 9.442775e-03 0.0199225140                    0.00
## 27 2.397031e-04 0.0005057293                    0.00
## 28 2.967703e-04 0.0006261306                    0.00
## 29 4.033185e-04 0.0008509276                    0.04
## 30 1.329543e-04 0.0002805090                    0.00
df <- relabund.classzulk.class.est.merge.nosplit
limits <- aes(ymin= ClassAbundance-ci, ymax= ClassAbundance+ci)
p <- ggplot(df, aes(Class))
p <- p + 
  ylab("Relative Abundance")+
  facet_grid(~Phylum, scales = "free", space="free") +
  guides(color=guide_legend(title="Growth Season")) + 
  theme_bw() + 
  theme(legend.position="top")+
  theme(strip.text.x = element_text(size = 8, colour = "blue", angle = 90))+
  theme(axis.text.x = element_text(size = 10, angle = 55, hjust=1),axis.text.y = element_text(size = 10))+
  geom_point(aes(y = ClassAbundance, color = "Braus 2016 (2014)"), size = 3, alpha = 0.5) + 
  geom_errorbar(limits, color = "darkred", width=0.5) +
  geom_point(aes(y = ClassAbundance_zulkifly, color = "Zulkifly 2012 (2011)"), size = 3, alpha = 0.5)
p